home *** CD-ROM | disk | FTP | other *** search
Text File | 1993-09-16 | 11.9 KB | 274 lines | [TEXT/CCL2] |
- ;-*- Mode: Lisp; Package: CCL -*-
- ; boyer-moore.lisp
- ;
- ; The Boyer/Moore string search algorithm.
- ; Replaces MCL's "Search Files" algorithm with Boyer/Moore and gives feedback
- ; while the search is progressing.
-
- ; To do
- ; Only finds one occurrence per block.
-
- ;;;;;;;;;;;;;
- ;;
- ;; Modification History
- ;;
- ;; 04/28/93 mwp Release
- ;; ?? bill no longer fails to find string that crosses the first block boundary.
- ;; 02/04/93 bill put up dialog before (directory ...) so that user can abort by
- ;; closing the dialog. Gracefully handle user closing of the dialog.
- ;; 02/02/93 bill (directory ... :resolve-aliases t). Maybe this should be a switch.
- ;; 10/16/92 bill The behavior of the selection changed in set-table-sequence.
- ;; Bullet-proof this code so it works independently of how
- ;; the selection changes.
- ;; 08/20/92 bill double clicking on white space in the "Files containing..."
- ;; dialog no longer brings up a "New..." window.
- ;; ------------- 2.0
- ;; 10/08/91 bill mac-file-io moves to CCL package
- ;; 09/05/91 bill Removed last vestige of LAP
-
- (in-package :ccl)
-
- (eval-when (:compile-toplevel :execute :load-toplevel)
- (require :mac-file-io)
- )
-
- (defstruct bm-tables
- string ; the search string as a vector of character codes
- len ; the length of the search string
- match ; index in string -> shift for last chars match
- mismatch ; char -> shift for last char mismatch
- )
-
- (defun compute-bm-tables (string &optional case-matters)
- (setq string (ensure-simple-string (if case-matters string (string-upcase string))))
- (let* ((len (length string))
- (len-1 (1- len))
- (len-2 (1- len-1))
- (mismatch (make-array 256 :element-type t :initial-element len))
- (match (make-array (max 0 len-1) :element-type t))
- (pred (if case-matters #'char= #'char-equal)))
- (declare (fixnum len len-1 len-2))
- ; compute mismatch table.
- ; mismatch[i] = how far to shift if there is a mismatch on the first
- ; compare (with string[len-1]) and the character in the text is (code-char i)
- (dotimes (i len-1)
- (declare (fixnum i))
- (setf (aref mismatch (char-code (schar string i))) (- len-1 i)))
- ; Compute match table
- ; match[i] = how far to shift if there is a mismatch in the ith position
- ; of the search string (i < len-1).
- (dotimes (i len-1) ; i is mismatch position
- (declare (fixnum i))
- (setf (aref match i)
- (block match
- (do ((end len-2 (1- end)))
- ((< end 0) len)
- (declare (fixnum end))
- (do ((j len-1 (1- j))
- (k end (1- k)))
- ((< k 0)
- (return-from match (- len-1 end)))
- (declare (fixnum j k))
- (when (eql j i)
- (if (not (funcall pred (schar string j) (schar string k)))
- (return-from match (- len-1 end))
- (return)))
- (unless (funcall pred (schar string j) (schar string k))
- (return)))))))
- (make-bm-tables :string (map 'vector #'char-code string) :len len :mismatch mismatch :match match)))
-
- (defmacro %char-code-upcase (char-code)
- (let ((c (gensym)))
- `(the fixnum
- (let ((,c ,char-code))
- (declare (fixnum ,c))
- (if (and (<= (char-code #\a) ,c)
- (<= ,c (char-code #\z)))
- (the fixnum (+ ,c (- (char-code #\A) (char-code #\a))))
- ,c)))))
-
- ; Search array from start to end for the string in the bm-tables descriptor
- (defun bm-search-array (bm-tables array start end)
- (declare (fixnum start end)
- (type macptr array))
- (declare (optimize (speed 3) (safety 0)))
- (let* ((string (bm-tables-string bm-tables))
- (len (bm-tables-len bm-tables))
- (len-1 (1- len))
- (match (bm-tables-match bm-tables))
- (mismatch (bm-tables-mismatch bm-tables))
- (i (+ start len-1))
- (char-code 0))
- (declare (fixnum len len-1 i char-code))
- (macrolet ((array-ref (array index) `(%char-code-upcase (%get-unsigned-byte ,array ,index))))
- (loop
- (when (>= i end) (return nil))
- (let ((array-idx i)
- (string-idx len-1))
- (declare (fixnum array-idx string-idx))
- (if (not (eql (the fixnum (svref string string-idx))
- (setq char-code (array-ref array array-idx))))
- (incf i (the fixnum (svref mismatch char-code)))
- (loop
- (when (< (decf string-idx) 0)
- (return-from bm-search-array (the fixnum (- i len-1))))
- (decf array-idx)
- (when (not (eql (the fixnum (svref string string-idx))
- (array-ref array array-idx)))
- (return (the fixnum (incf i (the fixnum (svref match string-idx)))))))))))))
-
-
- (defconstant $bm-buffer-size 8192)
-
- (defun find-bm-tables-in-file (bm-tables file &optional found-function)
- (unless found-function
- (let (res)
- (setq found-function
- #'(lambda (pos)
- (if pos
- (push pos res)
- (prog1 (nreverse res) (setq res nil)))))))
- (with-FSopen-file (pb file)
- (let* ((len (bm-tables-len bm-tables))
- (len-1 (1- len))
- (buffer-size (+ $bm-buffer-size len-1))
- (size 0)
- (bytes-read 0)
- (base 0)
- (index 0))
- (declare (fixnum len buffer-size size bytes-read base))
- (%stack-block ((buf buffer-size))
- (with-macptrs ((buf+len-1 (%inc-ptr buf len-1))
- (buf+$bm-buffer-size (%inc-ptr buf $bm-buffer-size)))
- (setq bytes-read (setq size (fsread pb $bm-buffer-size buf)))
- (with-macptrs ((ptr (%inc-ptr buf+$bm-buffer-size (- len-1))))
- (#_BlockMove ptr buf+$bm-buffer-size len-1))
- (loop
- (when (> bytes-read 0)
- (setq index 0)
- (loop
- (if (setq index (bm-search-array bm-tables buf index size))
- (progn
- (unless (funcall found-function (+ base index))
- (return-from find-bm-tables-in-file nil))
- (setq index (the fixnum (1+ (the fixnum index)))))
- (return))))
- (when (< bytes-read $bm-buffer-size)
- (return (funcall found-function nil)))
- (incf base $bm-buffer-size)
- (#_BlockMove buf+$bm-buffer-size buf len-1)
- (setq bytes-read (fsread pb $bm-buffer-size buf+len-1))
- (setq size (+ len-1 bytes-read))))))))
-
-
- ; Call FOUND-FUNCTION with one arg, the position in the file, for each
- ; occurrence of STRING in FILE. Calls FOUND-FUNCTION with an arg of NIL when
- ; the last occurrence has been found, and returns the value as the value
- ; of BM-FIND-STRING-IN-FILE.
- ; If FOUND-FUNCTION returns NIL, return NIL from BM-FIND-STRING-IN-FILE.
- (defun bm-find-string-in-file (string file &optional found-function)
- (find-bm-tables-in-file (compute-bm-tables string) file found-function))
-
- ; Call FOUND-FUNCTION with two args, the file & the position in the file,
- ; for each occurrence of STRING in one of the FILES.
- ; If FOUND-FUNCTION returns NIL, go immediately to the next file.
- ; Otherwise, continue searching in the current file.
- ; Calls FOUND-FUNCTION with a second arg of T before starting to search each file
- ; and with a second arg of NIL at the end of searching each file.
- ; Calls FOUND-FUNCTION with a first arg of NIL when the search is all over.
- (defun bm-find-string-in-files (string files &optional found-function)
- (unless found-function
- (setq found-function
- (let (res one-file)
- #'(lambda (file pos)
- (cond ((eq file nil) (prog1 (nreverse res) (setq res nil)))
- ((eq pos t) (setq one-file nil))
- ((eq pos nil) (when one-file
- (push (cons file (nreverse one-file)) res)))
- (t (push pos one-file)))))))
- (let ((bm (compute-bm-tables string))
- search-file)
- (flet ((inner-found-function (pos)
- (funcall found-function search-file pos)))
- (declare (dynamic-extent inner-found-function))
- (dolist (file files)
- (setq search-file file)
- (funcall found-function file t)
- (find-bm-tables-in-file bm file #'inner-found-function))
- (funcall found-function nil nil))))
-
- (defun bm-find-string-in-dir (string dir &optional found-function)
- (bm-find-string-in-files string (directory dir) found-function))
-
- ;;;;;;;;;;;;;
- ;;
- ;; Upate MCL's "Search Files" command
- ;;
-
- (defvar *search-files-dialog* nil)
-
- ; Stub, so that redefining bm-di-dialog-file-search-internal won't
- ; require reevaluating the (setf (symbol-function 'do-dialog-file-search) ...) form.
- (defun bm-do-dialog-file-search (pathname string)
- (bm-do-dialog-file-search-internal pathname string))
-
- (defun bm-do-dialog-file-search-internal (pathname string)
- (let* ((dialog (select-item-from-list
- nil
- :window-title (format nil "Files containing ~s" string)
- :modeless t
- :action-function
- #'(lambda (list)
- (when list
- (maybe-start-isearch (ed (car list)) string)))))
- (*search-files-dialog* dialog)
- (sequence (car (subviews dialog 'sequence-dialog-item)))
- (button (default-button dialog))
- files)
- (catch dialog ; thrown to by window-close method below
- (set-table-sequence sequence (list (format nil "Finding ~s" pathname)))
- (unless (setq files (directory pathname :resolve-aliases t))
- (set-table-sequence sequence (list "No files correspond to:" pathname))
- (return-from bm-do-dialog-file-search-internal nil))
- (set-table-sequence sequence nil)
- (set-cell-font sequence #@(0 0) :italic)
- (set-table-sequence sequence (list (car files)))
- (flet ((f (file index)
- (without-interrupts
- (flet ((ensure-selected-cell (sequence new-cell)
- (let ((old-cell (first-selected-cell sequence)))
- (unless (eql new-cell old-cell)
- (when old-cell (cell-deselect sequence old-cell))
- (when new-cell (cell-select sequence new-cell))))))
- (cond ((null file)
- (set-cell-font sequence #@(0 0) nil)
- (let ((sel (first-selected-cell sequence)))
- (set-table-sequence
- sequence (cdr (table-sequence sequence)))
- (when sel
- (setq sel
- (if (eql sel #@(0 0))
- nil
- (make-point (point-h sel) (1- (point-v sel))))))
- (ensure-selected-cell sequence sel)))
- ((eq index t)
- (setf (car (table-sequence sequence)) file)
- (redraw-cell sequence #@(0 0)))
- (index
- (let ((sel (or (first-selected-cell sequence) #@(0 1))))
- (set-table-sequence
- sequence (nconc (table-sequence sequence) (list file)))
- (ensure-selected-cell sequence sel)
- (dialog-item-enable button)
- nil))
- (t nil))))))
- (declare (dynamic-extent #'f))
- (bm-find-string-in-files string files #'f)))))
-
- (defmethod window-close :after ((w select-dialog))
- (if (eq w *search-files-dialog*)
- (throw w nil)))
-
- (let ((*warn-if-redefine* nil)
- (*warn-if-redefine-kernel* nil))
- (setf (symbol-function 'do-dialog-file-search) #'bm-do-dialog-file-search))